Load data
suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(HoneyBADGER))
sce <- readRDS("../../manuscript_analysis2_0629/Rds/fresh_secretory.rds")
scater::plotTSNE(sce, colour_by = "final.clusters")

table(sce$Patient)
##
## 11543L 11543R 11545L 11545R 11553L 11553R 15066R 15072L 15072R
## 86 101 250 175 190 192 247 231 12
sce2 <- sce[,sce$Patient %in% c("11543L")]
table(sce2$final.clusters)
##
## 1 2 3 4
## 6 19 61 0
# 11528L.2
# 46
refFT <- rowMeans(logcounts(sce)[, sce$final.clusters == 3])
ft <- logcounts(sce2)
ft[1:5,1:5]
## 11543L-p1-I19 11543L-p1-F19 11543L-p1-E21 11543L-p1-I24
## WASH7P 0.0000000 0 0 0
## LOC729737 0.5299579 0 0 0
## LOC100133331 0.0000000 0 0 0
## LOC100288069 0.0000000 0 0 0
## LINC00115 0.0000000 0 0 0
## 11543L-p1-A24
## WASH7P 0
## LOC729737 0
## LOC100133331 0
## LOC100288069 0
## LINC00115 0
keep <- rowSums(ft > 1) > 5
refFT <- refFT[keep]
ft <- ft[keep,]
# ft <- ft[,1:100]
rm(keep)
dim(ft)
## [1] 9535 86
require(biomaRt) ## for gene coordinates
## Loading required package: biomaRt
mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = 'hsapiens_gene_ensembl', host = "jul2015.archive.ensembl.org")
hb <- new('HoneyBADGER', name='11543L')
hb$setGexpMats(ft, refFT, mart.obj, filter=F, scale=F, verbose=TRUE)
## Initializing expression matrices ...
## Normalizing gene expression for 9535 genes and 86 cells ...
## Done setting initial expression matrices!
hb$plotGexpProfile() ## initial visualization

11543R
sce2 <- sce[,sce$Patient %in% c("11543R")]
refFT <- rowMeans(logcounts(sce))
ft <- logcounts(sce2)
ft[1:5,1:5]
## 11543R-p1-K09 11543R-p1-K12 11543R-p1-A15 11543R-p1-B20
## WASH7P 0.0000000 0 0 0
## LOC729737 0.0000000 0 0 0
## LOC100133331 0.0000000 0 0 0
## LOC100288069 0.3242997 0 0 0
## LINC00115 0.0000000 0 0 0
## 11543R-p1-J21
## WASH7P 0
## LOC729737 0
## LOC100133331 0
## LOC100288069 0
## LINC00115 0
keep <- rowSums(ft > 1) > 5
refFT <- refFT[keep]
ft <- ft[keep,]
# ft <- ft[,1:100]
rm(keep)
dim(ft)
## [1] 7524 101
hb <- new('HoneyBADGER', name='11543L')
hb$setGexpMats(ft, refFT, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 7524 genes and 101 cells ...
## Done setting initial expression matrices!
hb$plotGexpProfile() ## initial visualization

11545
rm(hb)
sce2 <- sce[,sce$Patient %in% c("11545L")]
refFT <- rowMeans(logcounts(sce))
ft <- logcounts(sce2)
ft[1:5,1:5]
## 11545L-p1-L02 11545L-p1-I04 11545L-p1-E09 11545L-p1-A11
## WASH7P 0 0 0 0
## LOC729737 0 0 0 0
## LOC100133331 0 0 0 0
## LOC100288069 0 0 0 0
## LINC00115 0 0 0 0
## 11545L-p1-D14
## WASH7P 0
## LOC729737 0
## LOC100133331 0
## LOC100288069 0
## LINC00115 0
keep <- rowSums(ft > 1) > 5
refFT <- refFT[keep]
ft <- ft[keep,]
# ft <- ft[,1:100]
rm(keep)
dim(ft)
## [1] 11826 250
ft <- ft[,order(sce2$final.clusters, decreasing = F)]
hb <- new('HoneyBADGER', name='11543L')
hb$setGexpMats(ft, refFT, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 11826 genes and 250 cells ...
## Done setting initial expression matrices!
hb$plotGexpProfile() ## initial visualization

sce2 <- sce[,sce$Patient %in% c("11545R")]
refFT <- rowMeans(logcounts(sce)[, sce$final.clusters == 3])
ft <- logcounts(sce2)
keep <- rowSums(ft > 1) > 5
refFT <- refFT[keep]
ft <- ft[keep,]
rm(keep)
dim(ft)
## [1] 10946 175
ft <- ft[,order(sce2$final.clusters)]
hb <- new('HoneyBADGER', name='11543L')
hb$setGexpMats(ft, refFT, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 10946 genes and 175 cells ...
## Done setting initial expression matrices!
hb$plotGexpProfile() ## initial visualization

11553
rm(hb)
sce2 <- sce[,sce$Patient2 %in% c("11553")]
refFT <- rowMeans(logcounts(sce)[, sce$final.clusters == 3])
ft <- logcounts(sce2)
keep <- (rowSums(ft > 1) > (ncol(ft)/10)) | (refFT > 0.5)
refFT <- refFT[keep]
ft <- ft[keep,]
rm(keep)
dim(ft)
## [1] 7685 382
ft <- ft[,order(paste(sce2$Patient,sce2$final.clusters))]
hb <- new('HoneyBADGER', name='11543L')
hb$setGexpMats(ft, refFT, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 7685 genes and 382 cells ...
## Done setting initial expression matrices!
dim(hb$gexp.sc)
## [1] 7685 382
hb$plotGexpProfile() ## initial visualization

15066
rm(hb)
sce2 <- sce[,sce$Patient2 %in% c("15066")]
refFT <- rowMeans(logcounts(sce)[, sce$final.clusters == 3])
ft <- logcounts(sce2)
keep <- (rowSums(ft > 1) > (ncol(ft)/10)) | (refFT > 0.5)
refFT <- refFT[keep]
ft <- ft[keep,]
rm(keep)
dim(ft)
## [1] 8755 247
ft <- ft[,order(paste(sce2$Patient,sce2$final.clusters))]
hb <- new('HoneyBADGER', name='11543L')
hb$setGexpMats(ft, refFT, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 8755 genes and 247 cells ...
## Done setting initial expression matrices!
dim(hb$gexp.sc)
## [1] 8755 247
hb$plotGexpProfile() ## initial visualization

15072
rm(hb)
sce2 <- sce[,sce$Patient2 %in% c("15072")]
refFT <- rowMeans(logcounts(sce)[, sce$final.clusters == 3])
ft <- logcounts(sce2)
keep <- (rowSums(ft > 1) > (ncol(ft)/10)) | (refFT > 0.5)
refFT <- refFT[keep]
ft <- ft[keep,]
rm(keep)
dim(ft)
## [1] 6555 243
ft <- ft[,order(paste(sce2$Patient,sce2$final.clusters))]
hb <- new('HoneyBADGER', name='11543L')
hb$setGexpMats(ft, refFT, mart.obj, filter=F, scale=T, verbose=TRUE)
## Initializing expression matrices ...
## Scaling coverage ...
## Normalizing gene expression for 6555 genes and 243 cells ...
## Done setting initial expression matrices!
hb$plotGexpProfile() ## initial visualization
